Introduction

The current version re-run the codes of the data analysis.

# environment setup to run ordered logit properly
options(contrasts = rep("contr.treatment", 2))

Load packages

library(tidyverse) # package for data cleaning and plotting
library(readxl)
library(modelsummary)
library(ordinal) # package for ordinal logit regression
library(brant) # brant test for the parallel assumption for ordered logit
library(MASS) # models that work with the brant test
library(broom) # extracting model summary as data frame
library(modelsummary) # deriving model tables
library(scales) # label percent
library(lubridate) # working with dates
library(marginaleffects) #to calculate marginal effects
library(gt) # to format tables
library(here) # work with directory
set.seed(5432)

Merging CQC and financial data

# import location level full data
rating<- read_csv(here("cleaned_data","cic_all_ratings_2019.csv"))
finance <- read_csv(here("cleaned_data","cls_finance_2025.csv"))
finance1 <- finance %>% 
  mutate(id_digit = as.numeric(str_extract(project_id, "\\d+"))) %>%
  arrange(id_digit) %>% 
  mutate(
    current_ratio = ifelse(currentliabilities == 0, NA, currentasset / currentliabilities),
    dissolved = factor(dissolved))
# checking the largest number for the project_id in the finance data set 
summary(finance1$id_digit)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     1.0   228.0   460.0   610.1   913.2  5181.0
#merging the data
cic2019 <- rating %>% 
  left_join(finance1, by = "project_id")

data cleanning

#select relevant columns, rename and relabel 
cic_cleaned <- cic2019 %>% 
  # recode legal form types to be more readable / easier to present
  mutate(# inherited = ifelse(inherited == "Y", TRUE, FALSE),
         rating = recode(rating, 
                         "Insufficient evidence to rate" = "NA",
                         "Requires improvement" = "Req improv"),
         date = ymd(publication_date)) %>% 
  
  # assign order in the rating levels
  mutate(rating = ordered(rating, levels = c("Inadequate","Req improv", "Good", "Outstanding")),
         social_care = ifelse(type == "Social Care Org", "social care", "healthcare")) %>% 
  
  
  # creating a new dummy variable for facility category
  mutate(founded = as.numeric(founded),
         year = year(date),
         age = year - founded, 
         Year = factor(year)) %>% 
  mutate(cls = ifelse(CLS == 1, "CLS", "CLG"),
         totalequity = as.numeric(totalequity),
         totalequity_std = scale(totalequity, center = TRUE, scale = TRUE)) %>% 
  
   mutate(share_size = case_when(
    totalissueshares <= 20 ~ "small",
    totalissueshares >= 100000 ~ "large",
    TRUE ~ NA_character_
  ))
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `founded = as.numeric(founded)`.
## Caused by warning:
## ! NAs introduced by coercion

Check the distribution of the data for share_size

cic_cleaned %>% 
  count(share_size)
## # A tibble: 3 × 2
##   share_size     n
##   <chr>      <int>
## 1 large         23
## 2 small        464
## 3 <NA>        1409

Total counts in full data set

count_full <- cic_cleaned %>% 
  mutate(overall = ifelse(domain == "Overall", 1, 0)) %>% 
  summarize(count_provider = n_distinct(provider_name),
            count_location = n_distinct(location_name)-1,
            count_overall_rating = sum(overall),
            count_rating = n()) 

count_full
## # A tibble: 1 × 4
##   count_provider count_location count_overall_rating count_rating
##            <int>          <dbl>                <dbl>        <int>
## 1            103            170                  551         1896
count_by_form <- cic_cleaned %>% 
  mutate(overall = ifelse(domain == "Overall", 1, 0)) %>% 
  group_by(cls) %>% 
  summarize(count_provider = n_distinct(provider_name),
            count_location = n_distinct(location_name)-1,
            count_overall_rating = sum(overall),
            count_rating = n()) 

count_by_form
## # A tibble: 3 × 5
##   cls   count_provider count_location count_overall_rating count_rating
##   <chr>          <int>          <dbl>                <dbl>        <int>
## 1 CLG               47             56                  104          527
## 2 CLS               56            114                  447         1354
## 3 <NA>               3              2                    0           15
count_by_level_form <- cic_cleaned %>% 
  mutate(overall = ifelse(domain == "Overall", 1, 0)) %>% 
  group_by(cls, level) %>% 
  summarize(count_provider = n_distinct(provider_name),
            count_location = n_distinct(location_name), 
 # the count_location for provider should be manually adjusted to 0
            count_overall_rating = sum(overall),
            count_rating = n()) 
## `summarise()` has grouped output by 'cls'. You can override using the `.groups`
## argument.
count_by_level_form
## # A tibble: 5 × 6
## # Groups:   cls [3]
##   cls   level    count_provider count_location count_overall_rating count_rating
##   <chr> <chr>             <int>          <int>                <dbl>        <int>
## 1 CLG   location             46             56                   75          353
## 2 CLG   provider              3              1                   29          174
## 3 CLS   location             51            114                  395         1042
## 4 CLS   provider             10              1                   52          312
## 5 <NA>  location              3              3                    0           15
count_by_level <- cic_cleaned %>% 
  mutate(overall = ifelse(domain == "Overall", 1, 0)) %>% 
  group_by(level) %>% 
  summarize(count_provider = n_distinct(provider_name),
            count_location = n_distinct(location_name), 
 # the count_location for provider should be manually adjusted to 0
            count_overall_rating = sum(overall),
            count_rating = n()) 

count_by_level
## # A tibble: 2 × 5
##   level    count_provider count_location count_overall_rating count_rating
##   <chr>             <int>          <int>                <dbl>        <int>
## 1 location             97            170                  470         1410
## 2 provider             13              1                   81          486
count_by_level_type <- cic_cleaned %>% 
  mutate(overall = ifelse(domain == "Overall", 1, 0)) %>% 
  group_by(social_care, level) %>% 
  summarize(count_provider = n_distinct(provider_name),
            count_location = n_distinct(location_name), 
 # the count_location for provider should be manually adjusted to 0
            count_overall_rating = sum(overall),
            count_rating = n()) 
## `summarise()` has grouped output by 'social_care'. You can override using the
## `.groups` argument.
count_by_level_type
## # A tibble: 3 × 6
## # Groups:   social_care [2]
##   social_care level    count_provider count_location count_overall_rating
##   <chr>       <chr>             <int>          <int>                <dbl>
## 1 healthcare  location             38             82                  374
## 2 healthcare  provider             13              1                   81
## 3 social care location             61             88                   96
## # ℹ 1 more variable: count_rating <int>
datasummary_crosstab(
  formula = cls ~ rating,
  data = cic_cleaned
)
tinytable_teri27jmghitw0ljwdgq
cls Inadequate Req improv Good Outstanding All
CLG N 13 50 412 52 527
% row 2.5 9.5 78.2 9.9 100.0
CLS N 44 98 1078 134 1354
% row 3.2 7.2 79.6 9.9 100.0
All N 57 151 1502 186 1896
% row 3.0 8.0 79.2 9.8 100.0
datasummary_crosstab(
  formula = cls * spinout ~ rating,
  data = cic_cleaned
)
tinytable_d30yies06ylidy0es72y
cls spinout Inadequate Req improv Good Outstanding All
CLG 0 N 10 27 254 32 323
% row 3.1 8.4 78.6 9.9 100.0
1 N 3 23 158 20 204
% row 1.5 11.3 77.5 9.8 100.0
CLS 0 N 25 46 643 92 806
% row 3.1 5.7 79.8 11.4 100.0
1 N 19 52 435 42 548
% row 3.5 9.5 79.4 7.7 100.0
All N 57 151 1502 186 1896
% row 3.0 8.0 79.2 9.8 100.0

regression analysis

models without finance variable

model_order_overall <- clm(rating ~ cls  + spinout + social_care + age + dissolved,
                data = filter(cic_cleaned, domain == "Overall"),
                link = "logit")

model_order_safe <- clm(rating ~ cls  + spinout + social_care + age + dissolved,
                data = filter(cic_cleaned, domain == "Safe"),
                link = "logit")
model_order_effective <- clm(rating ~ cls  + spinout + social_care + age + dissolved,
                data = filter(cic_cleaned, domain == "Effective"),
                link = "logit")
model_order_caring <- clm(rating ~ cls  + spinout + social_care + age + dissolved,
                data = filter(cic_cleaned, domain == "Caring"),
                link = "logit")
model_order_well_led <- clm(rating ~ cls  + spinout + social_care + age + dissolved,
                data = filter(cic_cleaned, domain == "Well-led"),
                link = "logit")
model_order_responsive <- clm(rating ~ cls  + spinout + social_care + age + dissolved,
                data = filter(cic_cleaned, domain == "Responsive"),
                link = "logit")
ordinal_models <-
  modelsummary(
    list(
      "overall" = model_order_overall,
      "safe" = model_order_safe,
      "effective" = model_order_effective,
      "caring" = model_order_caring,
      "well-led" = model_order_well_led,
      "responsive" = model_order_responsive
    ),
    coef_omit = "region",
    exponentiate = F,
    statistic = "({p.value}) {stars}")
ordinal_models
tinytable_k502zsxz8pkka343ympg
overall safe effective caring well-led responsive
Inadequate|Req improv -3.495 -3.316 -3.785 -3.517
(<0.001) *** (<0.001) *** (<0.001) *** (<0.001) ***
Req improv|Good -2.506 -1.498 -2.326 -5.341 -2.031 -3.916
(<0.001) *** (0.003) ** (<0.001) *** (<0.001) *** (<0.001) *** (<0.001) ***
Good|Outstanding 1.424 4.126 2.788 0.669 1.843 1.466
(<0.001) *** (<0.001) *** (<0.001) *** (0.206) (<0.001) *** (0.008) **
clsCLS 0.478 0.588 0.266 -0.330 -0.041 -0.118
(0.076) + (0.092) + (0.466) (0.368) (0.895) (0.764)
spinout -0.378 -0.323 -0.433 -0.184 -0.264 -0.626
(0.068) + (0.344) (0.226) (0.600) (0.377) (0.109)
social_caresocial care 0.090 0.972 0.145 -0.820 0.026 -0.098
(0.734) (0.013) * (0.694) (0.040) * (0.933) (0.801)
age -0.120 -0.042 -0.051 -0.071 -0.025 -0.064
(<0.001) *** (0.386) (0.266) (0.204) (0.563) (0.246)
dissolved1 0.082 -0.724 0.222 0.065 0.188 -0.011
(0.803) (0.085) + (0.657) (0.896) (0.652) (0.983)
Num.Obs. 540 261 261 261 261 261
AIC 906.8 341.4 332.7 274.7 447.6 269.9
BIC 941.1 369.9 361.2 299.7 476.1 294.8
RMSE 2.45 2.17 2.23 1.55 2.42 1.40

models with equity variables

Due to the large range/dispersion of the fiancial data. I standardize the totalequity variable to enable the models to run.

summary(cic_cleaned$totalequity)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max.     NA's 
## -6481922    36481   726352  2042766  3827016  9661966       45

models with total equity

eq_order_overall <- clm(rating ~ cls  + spinout + social_care + age + dissolved + totalequity_std,
                data = filter(cic_cleaned, domain == "Overall"),
                link = "logit")

eq_order_safe <- clm(rating ~ cls  + spinout + social_care + age + dissolved + totalequity_std,
                data = filter(cic_cleaned, domain == "Safe"),
                link = "logit")
eq_order_effective <- clm(rating ~ cls  + spinout + social_care + age + dissolved + totalequity_std,
                data = filter(cic_cleaned, domain == "Effective"),
                link = "logit")
eq_order_caring <- clm(rating ~ cls  + spinout + social_care + age + dissolved + totalequity_std,
                data = filter(cic_cleaned, domain == "Caring"),
                link = "logit")
eq_order_well_led <- clm(rating ~ cls  + spinout + social_care + age + dissolved + totalequity_std,
                data = filter(cic_cleaned, domain == "Well-led"),
                link = "logit")
eq_order_responsive <- clm(rating ~ cls  + spinout + social_care + age + dissolved + totalequity_std,
                data = filter(cic_cleaned, domain == "Responsive"),
                link = "logit")
eq_models <-
  modelsummary(
    list(
      "overall" = eq_order_overall,
      "safe" = eq_order_safe,
      "effective" = eq_order_effective,
      "caring" = eq_order_caring,
      "well-led" = eq_order_well_led,
      "responsive" = eq_order_responsive
    ),
    coef_omit = "region",
    exponentiate = F,
    statistic = "({p.value}) {stars}")
eq_models
tinytable_n48qs45djadyft9jwtr4
overall safe effective caring well-led responsive
Inadequate|Req improv -3.440 -3.426 -3.834 -3.579
(<0.001) *** (<0.001) *** (<0.001) *** (<0.001) ***
Req improv|Good -2.452 -1.587 -2.369 -5.388 -2.090 -3.960
(<0.001) *** (0.002) ** (<0.001) *** (<0.001) *** (<0.001) *** (<0.001) ***
Good|Outstanding 1.483 4.083 2.764 0.633 1.801 1.433
(<0.001) *** (<0.001) *** (<0.001) *** (0.230) (<0.001) *** (0.009) **
clsCLS 0.450 0.626 0.297 -0.293 -0.007 -0.083
(0.099) + (0.074) + (0.417) (0.430) (0.982) (0.834)
spinout -0.276 -0.574 -0.584 -0.292 -0.402 -0.735
(0.267) (0.122) (0.132) (0.447) (0.213) (0.085) +
social_caresocial care 0.035 1.253 0.325 -0.698 0.191 0.022
(0.901) (0.003) ** (0.425) (0.109) (0.575) (0.959)
age -0.114 -0.057 -0.060 -0.081 -0.037 -0.073
(<0.001) *** (0.234) (0.189) (0.161) (0.401) (0.197)
dissolved1 0.070 -0.665 0.263 0.095 0.232 0.017
(0.832) (0.115) (0.599) (0.848) (0.579) (0.975)
totalequity_std -0.097 0.332 0.203 0.142 0.188 0.140
(0.455) (0.080) + (0.309) (0.480) (0.263) (0.524)
Num.Obs. 540 261 261 261 261 261
AIC 908.3 340.3 333.6 276.2 448.4 271.5
BIC 946.9 372.4 365.7 304.7 480.4 300.0
RMSE 2.45 2.17 2.23 1.55 2.42 1.40

models with current ratio

current_order_overall <- clm(rating ~ cls  + spinout + social_care + age + dissolved + current_ratio,
                data = filter(cic_cleaned, domain == "Overall"),
                link = "logit")

current_order_safe <- clm(rating ~ cls  + spinout + social_care + age + dissolved + current_ratio,
                data = filter(cic_cleaned, domain == "Safe"),
                link = "logit")
current_order_effective <- clm(rating ~ cls  + spinout + social_care + age + dissolved + current_ratio,
                data = filter(cic_cleaned, domain == "Effective"),
                link = "logit")
current_order_caring <- clm(rating ~ cls  + spinout + social_care + age + dissolved + current_ratio,
                data = filter(cic_cleaned, domain == "Caring"),
                link = "logit")
current_order_well_led <- clm(rating ~ cls  + spinout + social_care + age + dissolved + current_ratio,
                data = filter(cic_cleaned, domain == "Well-led"),
                link = "logit")
current_order_responsive <- clm(rating ~ cls  + spinout + social_care + age + dissolved + current_ratio,
                data = filter(cic_cleaned, domain == "Responsive"),
                link = "logit")
current_models <-
  modelsummary(
    list(
      "overall" = current_order_overall,
      "safe" = current_order_safe,
      "effective" = current_order_effective,
      "caring" = current_order_caring,
      "well-led" = current_order_well_led,
      "responsive" = current_order_responsive
    ),
    coef_omit = "region",
    exponentiate = F,
    statistic = "({p.value}) {stars}")
current_models
tinytable_of4w3xa9up910qrxohpl
overall safe effective caring well-led responsive
Inadequate|Req improv -3.373 -3.234 -3.624 -3.400
(<0.001) *** (<0.001) *** (<0.001) *** (<0.001) ***
Req improv|Good -2.381 -1.416 -2.162 -5.284 -1.912 -3.826
(<0.001) *** (0.007) ** (<0.001) *** (<0.001) *** (<0.001) *** (<0.001) ***
Good|Outstanding 1.562 4.220 2.986 0.733 1.977 1.563
(<0.001) *** (<0.001) *** (<0.001) *** (0.180) (<0.001) *** (0.006) **
clsCLS 0.503 0.610 0.324 -0.311 -0.001 -0.082
(0.062) + (0.082) + (0.379) (0.400) (0.998) (0.837)
spinout -0.344 -0.306 -0.398 -0.169 -0.236 -0.606
(0.099) + (0.371) (0.270) (0.631) (0.431) (0.123)
social_caresocial care 0.080 0.953 0.108 -0.841 0.002 -0.120
(0.763) (0.015) * (0.771) (0.037) * (0.994) (0.758)
age -0.120 -0.042 -0.049 -0.070 -0.024 -0.063
(<0.001) *** (0.386) (0.280) (0.207) (0.575) (0.250)
dissolved1 0.114 -0.721 0.237 0.071 0.198 -0.002
(0.731) (0.086) + (0.637) (0.886) (0.634) (0.997)
current_ratio 0.048 0.037 0.063 0.025 0.045 0.033
(0.134) (0.538) (0.186) (0.611) (0.296) (0.483)
Num.Obs. 540 261 261 261 261 261
AIC 906.6 343.0 333.2 276.5 448.6 271.4
BIC 945.2 375.1 365.3 305.0 480.6 299.9
RMSE 2.45 2.17 2.23 1.55 2.42 1.40